vignettes/example-gallery-08-interactive-charts.Rmd
example-gallery-08-interactive-charts.RmdThis document is adapted from the Bar Charts section of the Altair Example Gallery.
Our first step is to set up our environment:
# devtools::install_github("vegawidget/altair")
library("altair")
library("tibble")
library("dplyr")##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
This is an example of using an interval selection to control the color of points across multiple facets.
## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
brush <- alt$selection(type = "interval", resolve="global")
base <-
alt$Chart(r_to_py(vega_data$cars()))$
mark_point()$
encode(
y="Miles_per_Gallon",
color=alt$condition(brush, "Origin", alt$ColorValue("gray"))
)$
properties(selection = brush, width = 250, height=250)
chart <- (base$encode(x = "Horsepower") | base$encode( x = "Acceleration"))
chartThe plot below uses an interval selection, which causes the chart to include an interactive brush (shown in grey). The brush selection parameterizes the red guideline, which visualizes the average value within the selected interval.
## Observations: 1,461
## Variables: 6
## $ date <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04,...
## $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1...
## $ temp_max <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4...
## $ temp_min <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6...
## $ wind <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4...
## $ weather <chr> "drizzle", "rain", "rain", "rain", "rain", "rain...
weather <- r_to_py(weather)
brush <- alt$selection(type = "interval", encodings = list("x"))
bars <-
alt$Chart()$
mark_bar()$
encode(
x = alt$X("date:O", timeUnit="month"),
y = "mean(precipitation):Q",
opacity = alt$condition(brush, alt$OpacityValue(1), alt$OpacityValue(0.7))
)$
properties(selection = brush)
line <-
alt$Chart()$
mark_rule(color="firebrick")$
encode(
y = "mean(precipitation):Q",
size = alt$SizeValue(3)
)$
transform_filter(brush$ref())
chart <- alt$layer(bars, line, data=weather)
chartThis example shows an interactive chart where selections in one portion of the chart affect what is shown in other panels. Click on the bar chart to see a detail of the distribution in the upper panel.
## Observations: 3,201
## Variables: 16
## $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary...
## $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "...
## $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "F...
## $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8,...
## $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 3...
## $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R",...
## $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "D...
## $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 100000...
## $ Release_Date <list> ["12-Jun-98", "7-Aug-98", "28-Aug-98",...
## $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 2...
## $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
## $ Source <list> [NULL, NULL, NULL, NULL, "Original Scr...
## $ Title <list> ["The Land Girls", "First Love, Last R...
## $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
## $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819,...
## $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521,...
pts <- alt$selection(type = "single", encodings = list("x"))
rect <-
alt$Chart(r_to_py(movies))$
mark_rect()$
encode(
x = alt$X("IMDB_Rating:Q", bin = TRUE),
y = alt$Y("Rotten_Tomatoes_Rating:Q", bin=TRUE),
color = alt$Color(
"count(IMDB_Rating)",
scale = alt$Scale(scheme = "greenblue"),
legend = alt$Legend(title = "Total Records")
)
)
circ <-
rect$
mark_point()$
encode(
color = alt$ColorValue("grey"),
size = alt$Size(
"count(IMDB_Rating)",
legend = alt$Legend(title = "Records in Selection")
)
)$
transform_filter(pts$ref())
bar <-
alt$Chart(r_to_py(movies))$
mark_bar()$
encode(
x = "Major_Genre:N",
y = "count(Major_Genre)",
color = alt$condition(pts, alt$ColorValue("steelblue"), alt$ColorValue("grey"))
)$
properties(selection = pts, width = 550, height = 200)
chart <-
((rect + circ) & bar)$
resolve_legend(color = "independent", size = "independent")
chartThis example shows a multi-panel view of the same data, where you can interactively select a portion of the data in any of the panels to highlight that portion in any of the other panels.
Note: alt$repeat() must be translated to alt$\x60repeat\x60().
## Observations: 2,000
## Variables: 5
## $ date <dttm> 2001-01-14 21:55:00, 2001-03-26 20:15:00, 2001-03...
## $ delay <dbl> 0, -11, -3, 12, 2, 47, 3, -4, 4, 0, 18, -7, -10, 2...
## $ destination <chr> "SMF", "SLC", "LAX", "SNA", "LAX", "AUS", "PHX", "...
## $ distance <dbl> 480, 507, 714, 342, 373, 189, 872, 723, 318, 487, ...
## $ origin <chr> "SAN", "PHX", "ELP", "SJC", "SMF", "DAL", "AUS", "...
brush <- alt$selection_interval(encodings = list("x"))
# Define the base chart, with the common parts of the
# background and highlights
base <-
alt$Chart()$
mark_bar()$
encode(
x = alt$X(
alt$`repeat`("column"),
type = "quantitative",
bin = alt$Bin(maxbins=20)
),
y = "count(delay)"
)$
properties(width = 180, height = 130)
# blue background with selection
background <- base$properties(selection = brush)
# yellow highlights on the transformed data
highlight <-
base$
encode(
color = alt$value("goldenrod")
)$
transform_filter(brush$ref())
# layer the two charts & repeat
chart <-
(background + highlight)$
properties(data = r_to_py(flights))$
transform_calculate("time", "hours(datum.date)")$
`repeat`(column = list("distance", "delay", "time"))
chartThis example shows how to add a simple rectangular brush to a scatter plot. By clicking and dragging on the plot, you can highlight points within the range.
## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
This multi-line chart uses an invisible Voronoi tessellation to handle mouseover to identify the nearest point and then highlight the line on which the point falls. It is adapted from the Vega-Lite example.
## Observations: 560
## Variables: 3
## $ symbol <chr> "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT",...
## $ date <dttm> 2000-01-01, 2000-02-01, 2000-03-01, 2000-04-01, 2000-0...
## $ price <dbl> 39.81, 36.35, 43.22, 28.37, 25.45, 32.54, 28.40, 28.40,...
highlight <-
alt$selection_single(
on = "mouseover",
fields = list("symbol"),
nearest = TRUE
)
base <-
alt$Chart(r_to_py(vega_data$stocks()))$
encode(
x = "date:T",
y = "price:Q",
color = "symbol:N"
)
points <-
base$mark_circle()$
encode(
opacity = alt$value(0)
)$
properties(selection = highlight, width = 600)
lines <-
base$
mark_line()$
encode(
size = alt$condition(highlight, alt$value(3), alt$value(1))
)
chart <- (points + lines)
chartThis example shows how you can use selections and layers to create a multi-line tooltip that tracks the
xposition of the cursor.To find the x-position of the cursor, we employ a little trick: we add some transparent points with only an
xencoding (noyencoding) and tie a nearest selection to these, tied to thexfield.
Definition
## Observations: 300
## Variables: 3
## $ category <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"...
## $ x <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ y <dbl> 1.37, 0.81, 1.17, 1.80, 2.20, 2.09, 3.60, 3.51, 5.53,...
# Create a selection that chooses the nearest point & selects based on x-value
nearest <- alt$selection(
type = "single",
nearest = TRUE,
on = "mouseover",
fields = list("x"),
empty = "none"
)
# The basic line
line <-
alt$Chart()$
mark_line(interpolate = "basis")$
encode(
x = "x:Q",
y = "y:Q",
color = "category:N"
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors <-
alt$Chart()$
mark_point()$
encode(
x = "x:Q",
opacity = alt$value(0)
)$
properties(selection = nearest)$
copy()
# Draw points on the line, and highlight based on selection
points <-
line$
mark_point()$
encode(
opacity = alt$condition(nearest, alt$value(1), alt$value(0))
)
# Draw text labels near the points, and highlight based on selection
text <-
line$
mark_text(align = "left", dx = 5, dy = -5)$
encode(
text = alt$condition(nearest, "y:Q", alt$value(" "))
)
# Draw a rule at the location of the selection
rules <-
alt$Chart()$
mark_rule(color = "gray")$
encode(
x = "x:Q"
)$
transform_filter(nearest$ref())
# Put the five layers into a chart and bind the data
chart <-
(line + selectors + points + rules + text)$
properties(data = r_to_py(data), width = 600, height = 300)
chartThis chart provides an interactive exploration of Seattle weather over the course of the year. It includes a one-axis brush selection to easily see the distribution of weather types in a particular date range.
## Observations: 1,461
## Variables: 6
## $ date <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04,...
## $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1...
## $ temp_max <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4...
## $ temp_min <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6...
## $ wind <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4...
## $ weather <chr> "drizzle", "rain", "rain", "rain", "rain", "rain...
scale <- alt$Scale(
domain = list("sun", "fog", "drizzle", "rain", "snow"),
range = list("#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd")
)
color <- alt$Color("weather:N", scale = scale)
# We create two selections:
# - a brush that is active on the top panel
# - a multi-click that is active on the bottom panel
brush <- alt$selection_interval(encodings = list("x"))
click <- alt$selection_multi(encodings = list("color"))
# Top panel is scatter plot of temperature vs time
points <-
alt$Chart()$
mark_point()$
encode(
x = alt$X(
"date:T",
timeUnit = "monthdate",
axis=alt$Axis(title="Date")
),
alt$Y(
"temp_max:Q",
axis = alt$Axis(title = "Maximum Daily Temperature (C)"),
scale = alt$Scale(domain = list(-5, 40))
),
color = alt$condition(brush, color, alt$value("lightgray")),
size = alt$Size("precipitation:Q", scale = alt$Scale(range = list(5, 200)))
)$
properties(width = 600, height = 300, selection = brush)$
transform_filter(click$ref())
# Bottom panel is a bar chart of weather type
bars <-
alt$Chart()$
mark_bar()$
encode(
x = "count(weather)",
y = "weather:N",
color = alt$condition(click, color, alt$value("lightgray"))
)$
transform_filter(brush$ref())$
properties(width = 600, selection = click)
chart <-
(points & bars)$
properties(
data = r_to_py(vega_data$seattle_weather()),
title = "Seattle Weather: 2012-2015"
)
chartThis example shows a selection that links two views of data: the left panel contains one point per object, and the right panel contains one line per object. Clicking on either the points or lines will select the corresponding objects in both views of the data.
The challenge lies in expressing such hierarchical data in a way that Altair can handle. We do this by merging the data into a “long form” dataframe, and aggregating identical metadata for the final plot.
Definition
n_objects <- 20
n_times <- 50
# Create one (x, y) pair of metadata per object
locations <-
tibble(
id = seq(1, n_objects),
x = rnorm(n_objects),
y = rnorm(n_objects)
)
# Create a 50-element time-series for each object
timeseries <-
crossing(
id = seq(1, n_objects),
time = seq(1, n_times)
) %>%
mutate(value = rnorm(n())) %>%
group_by(id) %>%
mutate(value = cumsum(value)) %>%
ungroup()
# Merge the (x, y) metadata into the long-form view
data <- left_join(timeseries, locations, by = "id")## Observations: 1,000
## Variables: 5
## $ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ time <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ value <dbl> -0.02142707, 0.64907101, 0.21445397, -0.89942582, -0.292...
## $ x <dbl> -0.004620768, -0.004620768, -0.004620768, -0.004620768, ...
## $ y <dbl> -0.4845954, -0.4845954, -0.4845954, -0.4845954, -0.48459...
selector <- alt$selection_single(empty = "all", fields = list("id"))
points <-
alt$Chart(r_to_py(data))$
mark_point(filled = TRUE, size = 200)$
encode(
x = "mean(x)",
y = "mean(y)",
color = alt$condition(
selector,
"id:O",
alt$value("lightgray"),
legend = NULL
)
)$
properties(selection = selector, width = 250, height = 250)$
interactive()$
copy()
timeseries <-
alt$Chart(r_to_py(data))$
mark_line()$
encode(
x = "time",
y = alt$Y("value", scale = alt$Scale(domain = c(-15, 15))),
color = alt$Color("id:O", legend = NULL)
)$
transform_filter(selector)$
properties(selection = selector, width = 250, height = 250)
points | timeseriesThis chart shows an example of using an interval selection to filter the contents of an attached histogram, allowing the user to see the proportion of items in each category within the selection.
TODO: find a way to fix the numerical scale of the bar chart
## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
cars <- r_to_py(vega_data$cars())
brush <- alt$selection(type="interval")
points <-
alt$Chart()$
mark_point()$
encode(
x = "Horsepower:Q",
y = "Miles_per_Gallon:Q",
color = alt$condition(brush, "Origin:N", alt$value("lightgray"))
)$
properties(selection = brush)
bars <-
alt$Chart()$
mark_bar()$
encode(
x = "count(Origin):Q",
y = "Origin:N",
color = "Origin:N"
)$
transform_filter(brush$ref())
chart <- alt$vconcat(points, bars, data = cars)
chart## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time.
## Observations: 570
## Variables: 4
## $ age <dbl> 0, 0, 5, 5, 10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35,...
## $ people <dbl> 1483789, 1450376, 1411067, 1359668, 1260099, 1216114, 1...
## $ sex <dbl> 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1...
## $ year <dbl> 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1...
pop <- r_to_py(vega_data$population())
pink_blue <-
alt$Scale(
domain = list("Male", "Female"),
range = list("steelblue", "salmon")
)
slider <- alt$binding_range(min = 1900, max = 2000, step = 10)
year <- alt$selection_single(
name = "year",
fields = list("year"),
bind = slider
)
chart <-
alt$Chart(pop)$
mark_bar()$
encode(
x = alt$X("sex:N", axis = alt$Axis(title = NULL)),
y = alt$Y("people:Q", scale = alt$Scale(domain = c(0, 1.2e7))),
color = alt$Color("sex:N", scale = pink_blue),
column = "age:O"
)$
properties(width = 20, selection = year)$
transform_calculate("sex", JS('if(datum.sex == 1, "Male", "Female")'))$
transform_filter(year$ref())
chart